** Producing Figure 1
** Download 5% decennial US Census from IPUMS in year 1970, 1980, 1990, 2000, and also download American Community Survey 2007, 2010 and 2015.

cd "C:\Users\your_directory"
use "acs1970-2015.dta", clear

append using "acs2007.dta"
keep if age>=18 & age<=64

gen hsl=1*(educ<=6)
gen lhs=1*(educ<=5)
gen foreign=0
replace foreign=1 if bpl>140 & citizen>1
replace bpld=1 if foreign==0
keep if foreign==1
replace hsl=hsl*perwt
replace lhs=lhs*perwt
collapse (rawsum)perwt hsl lhs, by (year)
replace perwt=perwt/1000000
replace hsl=hsl/1000000
replace lhs=lhs/1000000

twoway (line perwt year, lpattern(vshortdash) lcolor(blue) lwidth(medthick) ) (line hsl year, lpattern(longdash) lcolor(red) lwidth(medium) ) (line lhs year, lpattern(solid) lwidth(medium) lcolor(green) ), legend(label(1 All) label(2 High school and less)  label(3 Less than high school) ring(10) position(6) col(2) size(medium) ) xtitle("Year") ytitle("Population (millions)") title("") xlab(1970 1980 1990 2000 2007 2015, nogrid) ylab(1 2 5 10 20 30, angle(h) nogrid) graphregion(color(white) ) yscale(log)
graph export "fig1.pdf", as(pdf) replace

******************************************************************************************************************
******************************************************************************************************************

** Producing Figure 2, the bar chart 
** Download 5% decennial US Census from IPUMS in year 1980, 1990, 2000, and also download American Community Survey 2010.


*1) First produce the top panel for Mexican immigrants, and save it as "mexico.pdf".

use "acs1970-2010.dta", clear

keep if age>=1 & age<=70
keep if educ<=6

gen foreign=0
replace foreign=1 if bpl>=150 & citizen>1
replace bpld=1 if foreign==0
keep if foreign==1

gen mex=1*(bpld==20000 & foreign==1)
gen latin=1*(bpld>=21000 & bpld<=31000 & foreign==1)
gen asia=1*(bpld>=50000 & bpld<=52400 & foreign==1)

replace perwt=perwt/1000

replace mex=mex*perwt
replace latin=latin*perwt
replace asia=asia*perwt

gen yob=year-age

*gen birth="birth cohort 1940-1949" if yob>=1940 & yob<=1949
gen birth="birth cohort 1950-1959" if yob>=1950 & yob<=1959
replace birth="birth cohort 1960-1969" if yob>=1960 & yob<=1969
replace birth="birth cohort 1970-1979" if yob>=1970 & yob<=1979

gen aa=age-(year-yrimmig)

gen aoa="arrival age 0-10" if aa>=0 & aa<=10
replace aoa="arrival age 11-20" if aa>=11 & aa<=20
replace aoa="arrival age 21-30" if aa>=21 & aa<=30

drop if birth==""
drop if aoa==""

collapse (rawsum)mex latin asia, by (birth year aoa)

reshape wide mex latin asia, i(aoa birth) j(year)

#delimit
graph bar mex*, over(aoa, gap(30) sort(l) label(labcolor(black) angle(30) labsize(vsmall) ) ) 
over(birth, gap(360) sort(l) label(labcolor(black) angle(30) labsize(small) ) ) 
bar(1, fcolor(ltblue) fintensity(inten100)) bar(2, fcolor(midblue) fintensity(inten100)) bar(3, fcolor(blue) fintensity(inten100)) bar(4, fcolor(navy) fintensity(inten100)) bar(5, fcolor(dknavy) fintensity(inten100)) 
legend(label(1 1970) label(2 1980) label(3 1990) label(4 2000) label(5 2010) ring(1) position(12) col(5) size(vsmall)) 
ytitle("Number of migration in thousands") ylab(0(500)2000, angle(h) nogrid) graphregion(fcolor(white));
 
graph export "mexico.pdf", as(pdf) replace;

*2) Second, producing the middle panel for Immigrants from Latin American countries, and save it as "latin.pdf".

use "acs1970-2010.dta", clear

keep if age>=1 & age<=70
keep if educ<=6

gen foreign=0
replace foreign=1 if bpl>=150 & citizen>1
replace bpld=1 if foreign==0
keep if foreign==1

gen mex=1*(bpld==20000 & foreign==1)
gen latin=1*(bpld>=21000 & bpld<=31000 & foreign==1)
gen sea=1*(bpld>=51000 & bpld<=52400 & foreign==1)
replace perwt=perwt/1000

replace mex=mex*perwt
replace latin=latin*perwt
replace sea=sea*perwt

gen ageg="age 15-24" if age>=15 & age<=24
replace ageg="age 25-34" if age>=25 & age<=34
replace ageg="age 35-44" if age>=35 & age<=44

gen aa=age-(year-yrimmig)

gen aoa="arrival age 05-14" if aa>=5 & aa<=14
replace aoa="arrival age 15-24" if aa>=15 & aa<=24
replace aoa="arrival age 25-34" if aa>=25 & aa<=34
replace aoa="arrival age 35-44" if aa>=35 & aa<=44

drop if ageg==""
drop if aoa==""

collapse (rawsum)mex latin sea, by (ageg year aoa)

reshape wide mex latin sea, i(aoa ageg) j(year)

#delimit
graph bar latin* , over(aoa, gap(100) label(labcolor(black) angle(30) labsize(vsmall) )   )  nofill   
over(ageg, gap(360) label(labcolor(black) angle(0) labsize(small) ) ) 
bar(1, fcolor(ltblue) fintensity(inten100)) bar(2, fcolor(midblue) fintensity(inten100)) bar(3, fcolor(blue) fintensity(inten100)) bar(4, fcolor(navy) fintensity(inten100)) bar(5, fcolor(dknavy) fintensity(inten100)) 
legend(label(1 1980) label(2 1990) label(3 2000) label(4 2010) ring(1) position(12) col(5) size(vsmall)) 
ytitle("Number of migration in thousands") ylab(0(200)800, angle(h) nogrid) graphregion(fcolor(white));
 
graph export "latin.pdf", as(pdf) replace;

*2) Finally, producing the bottom panel for Immigrants from Southeast Asian countries, and save it as "sea.pdf".

use "acs1970-2010.dta", clear

keep if age>=1 & age<=70
keep if educ<=6

gen foreign=0
replace foreign=1 if bpl>=150 & citizen>1
replace bpld=1 if foreign==0
keep if foreign==1

gen mex=1*(bpld==20000 & foreign==1)
gen latin=1*(bpld>=21000 & bpld<=31000 & foreign==1)
gen sea=1*(bpld>=51000 & bpld<=52400 & foreign==1)
replace perwt=perwt/1000

replace mex=mex*perwt
replace latin=latin*perwt
replace sea=sea*perwt

gen ageg="age 15-24" if age>=15 & age<=24
replace ageg="age 25-34" if age>=25 & age<=34
replace ageg="age 35-44" if age>=35 & age<=44

gen aa=age-(year-yrimmig)

gen aoa="arrival age 05-14" if aa>=5 & aa<=14
replace aoa="arrival age 15-24" if aa>=15 & aa<=24
replace aoa="arrival age 25-34" if aa>=25 & aa<=34
replace aoa="arrival age 35-44" if aa>=35 & aa<=44

drop if ageg==""
drop if aoa==""

collapse (rawsum)mex latin sea, by (ageg year aoa)

gen group=1 if aoa=="arrival age 05-14"
replace group=2 if aoa=="arrival age 15-24"
replace group=3 if aoa=="arrival age 25-34"
replace group=4 if aoa=="arrival age 35-44"

drop aoa
reshape wide mex latin sea, i(ageg year) j(group)


#delimit
graph bar sea* , over(year, gap(100) label(labcolor(black) angle(30) labsize(vsmall) )   )  nofill   
over(ageg, gap(360) label(labcolor(black) angle(0) labsize(small) ) ) 
bar(1, fcolor(dknavy) fintensity(inten100)) bar(2, fcolor(blue) fintensity(inten100)) bar(3, fcolor(midblue) fintensity(inten100)) bar(4, fcolor(ltblue) fintensity(inten100)) bar(5, fcolor(dknavy) fintensity(inten100)) 
legend(label(1 arrival age 5-14) label(2 arrival age 15-24) label(3 arrival age 25-34) label(4 arrival age 35-44) ring(1) position(12) col(2) size(vsmall)) 
ytitle("No. of working-age immigrants in thousands") ylab(0(100)300, angle(h) nogrid) graphregion(fcolor(white));
 
graph export "sea.pdf", as(pdf) replace;

******************************************************************************************************************
******************************************************************************************************************
*The following code produces the left Panel of Figure 3. The analysis bases on Current Population Survey (CPS) from year 1994-2015.

use "cps9415.dta", clear
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
drop if wtsupp<=0
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen lessh=1*(educ>=1 & educ<70)
collapse (rawsum)wtsupp, by (year lessh foreign)
egen t_hrs=sum(wtsupp), by(year)
gen phi1=wtsupp/t_hrs
keep if foreign==1 & lessh==1
save "1.dta", replace

use "cps9415.dta", clear
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
drop if wtsupp<=0
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen mex=0
replace mex=1 if bpl==20000
gen lessh=1*(educ>=1 & educ<70)
collapse (rawsum)wtsupp, by (year lessh mex)
egen t_hrs=sum(wtsupp), by(year)
gen phi3=wtsupp/t_hrs
keep if mex==1 & lessh==1
save "2.dta", replace

use "cps9415.dta", clear
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
drop if wtsupp<=0
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen mex=0
replace mex=1 if bpl==20000
gen hless=1*(educ>=1 & educ<=73)
collapse (rawsum)wtsupp, by (year hless foreign)
egen t_hrs=sum(wtsupp), by(year)
gen phi2=wtsupp/t_hrs
keep if foreign==1 & hless==1
save "3.dta", replace

use "cps9415.dta", clear
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
drop if wtsupp<=0
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen mex=0
replace mex=1 if bpl==20000
gen hless=1*(educ>=1 & educ<=73)
collapse (rawsum)wtsupp, by (year hless mex)
egen t_hrs=sum(wtsupp), by(year)
gen phi4=wtsupp/t_hrs
keep if mex==1 & hless==1
merge 1:1 year using "1.dta"
drop _m
merge 1:1 year using "2.dta"
drop _m
merge 1:1 year using "3.dta"
drop _m
replace year=year-1
twoway (line phi1 year, lpattern(vshortdash) lcolor(blue) ) (line phi2 year, lpattern(longdash) lcolor(red) ) (line phi3 year, lpattern(shortdash) lcolor(green) ) (line phi4 year, lpattern(line) lcolor(pink) ), legend(label(1 Foreign-born less than high school) label(2 Foreign-born high school and less) label(3 Mexican-born less than high school) label(4 Mexican-born high school and less) ring(10) position(6) col(2) size(vsmall) ) xtitle("Year") ytitle("") title("Raw share of low-skilled immigrants") xlab(1993(3)2015, nogrid) ylab(0(0.05)0.2, angle(h) nogrid) graphregion(color(white) ) 
graph export "1.pdf", as(pdf) replace

*The following code produces the right Panel of Figure 3. We produce the weights of productivity equivalent units by following the exact procedure as in Autor, Katz and Kearney (2008 ReStat). 
*PCE data is the personal consumption expenditures price index collected from Bureau of Labor Stat.
use "cps9415.dta", clear
gen years=9 if educ99<=6
replace years=10 if educ99==7
replace years=11 if educ99==8
replace years=12 if educ99==9 | educ99==10
replace years=13.5 if educ99==11
replace years=14 if educ99==12 | educ99==13 | educ99==14
replace years=16 if educ99==15
replace years=17.5 if educ99==16 | educ99==17
replace years=18 if educ99==18
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
drop if wtsupp<=0
gen exp=age-years-6
replace exp=age-16 if age-16<exp
replace exp=0 if exp<0
keep if exp>=0 & exp<=39
gen exper = 1*(exp>=0 & exp<=4) + 2*(exp>=5 & exp<=9)   + 3*(exp>=10 & exp<=14) + 4*(exp>=15 & exp<=19) + 5*(exp>=20 & exp<=24) + 6*(exp>=25 & exp<=29) + 7*(exp>=30 & exp<=34) + 8*(exp>=35 & exp<=39) 
drop if exper==0 
gen black=1*(race==200)
gen edu1=1* (educ>1 & educ<71)+2* (educ>70 & educ<=73) + 3*(educ>73 & educ<110) + 4*(educ>=111)
gen edu2=1* (educ>1 & educ<=73) + 2*(educ>73 & educ<110) + 3*(educ>=111)
drop if edu1==0
drop if edu2==0
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen mex=0
replace mex=1 if bpl==20000
merge m:1 year using "pce.dta"
drop _m
gen earn = incwage/(wkswork1*uhrswork1*pce)
save "cps9415+.dta", replace
keep if wkswork1>=35 
keep if uhrswork1>=35 & uhrswork1<=99
drop if incwage>=9999990
for any 10 13 14 : drop if classwkr==X
collapse (mean) earn [aw=wtsupp], by(year edu1 exper sex black)
save "x1.dta", replace

use "cps9415+.dta", clear 
keep if years==12 & sex==1 & black==0
keep if exp>=8 & exp<=12
replace wtsupp=wtsupp*wkswork1
collapse (mean) earn [aw=wtsupp], by(year)
ren earn earn_hs
sort year
save "x2.dta", replace

use "x1.dta", clear
sort year 
merge m:1 year using "x2.dta"
drop _m
gen peu = earn/earn_hs
collapse (mean)peu, by(exper edu1 sex black)
save "peu.dta", replace

use "cps9415+.dta", clear 
merge m:1 exper edu1 sex black using "peu.dta"
drop _m
save "cps9415++.dta", replace

replace wtsupp=wtsupp*peu
gen lessh=1*(educ>=1 & educ<70)
collapse (rawsum)wtsupp, by (year lessh foreign)
egen t_hrs=sum(wtsupp), by(year)
gen phi1=wtsupp/t_hrs
keep if foreign==1 & lessh==1
save "1.dta", replace

use "cps9415++.dta", clear
replace wtsupp=wtsupp*peu
gen lessh=1*(educ>=1 & educ<70)
collapse (rawsum)wtsupp, by (year lessh mex)
egen t_hrs=sum(wtsupp), by(year)
gen phi3=wtsupp/t_hrs
keep if mex==1 & lessh==1
save "2.dta", replace

use "cps9415++.dta", clear
replace wtsupp=wtsupp*peu
gen hless=1*(educ>=1 & educ<=73)
collapse (rawsum)wtsupp, by (year hless foreign)
egen t_hrs=sum(wtsupp), by(year)
gen phi2=wtsupp/t_hrs
keep if foreign==1 & hless==1
save "3.dta", replace

use "cps9415++.dta", clear
replace wtsupp=wtsupp*peu
gen hless=1*(educ>=1 & educ<=73)
collapse (rawsum)wtsupp, by (year hless mex)
egen t_hrs=sum(wtsupp), by(year)
gen phi4=wtsupp/t_hrs
keep if mex==1 & hless==1
merge 1:1 year using "1.dta"
drop _m
merge 1:1 year using "2.dta"
drop _m
merge 1:1 year using "3.dta"
drop _m
replace year=year-1
twoway (line phi1 year, lpattern(vshortdash) lcolor(blue) ) (line phi2 year, lpattern(longdash) lcolor(red) ) (line phi3 year, lpattern(shortdash) lcolor(green) ) (line phi4 year, lpattern(line) lcolor(pink) ), legend(label(1 Foreign-born less than high school) label(2 Foreign-born high school and less) label(3 Mexican-born less than high school) label(4 Mexican-born high school and less) ring(10) position(6) col(2) size(vsmall) ) xtitle("Year") ytitle("") title("PEU-weighted share of low-skilled immigrants") xlab(1993(3)2015, nogrid) ylab(0(0.02)0.1, angle(h) nogrid) graphregion(color(white) ) 
graph export "6.pdf", as(pdf) replace

******************************************************************************************************************
******************************************************************************************************************
*The following code produces the left Panel of Figure 4. Again, we use CPS year 1994-2015.

use "cps9415.dta", clear
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
keep if uhrswork1<100 & uhrswork1>0
drop if wtsupp<=0
replace wtsupp=wtsupp*wkswork1*uhrswork1
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen lessh=1*(educ>=1 & educ<70)
collapse (rawsum)wtsupp, by (year lessh foreign)
egen t_hrs=sum(wtsupp), by(year)
gen phi1=wtsupp/t_hrs
keep if foreign==1 & lessh==1
save "1.dta", replace

use "cps9415.dta", clear
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
keep if uhrswork1<100 & uhrswork1>0
drop if wtsupp<=0
replace wtsupp=wtsupp*wkswork1*uhrswork1
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen mex=0
replace mex=1 if bpl==20000
gen lessh=1*(educ>=1 & educ<70)
collapse (rawsum)wtsupp, by (year lessh mex)
egen t_hrs=sum(wtsupp), by(year)
gen phi3=wtsupp/t_hrs
keep if mex==1 & lessh==1
save "2.dta", replace

use "cps9415.dta", clear
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
keep if uhrswork1<100 & uhrswork1>0
drop if wtsupp<=0
replace wtsupp=wtsupp*wkswork1*uhrswork1
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen mex=0
replace mex=1 if bpl==20000
gen hless=1*(educ>=1 & educ<=73)
collapse (rawsum)wtsupp, by (year hless foreign)
egen t_hrs=sum(wtsupp), by(year)
gen phi2=wtsupp/t_hrs
keep if foreign==1 & hless==1
save "3.dta", replace

use "cps9415.dta", clear
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
keep if uhrswork1<100 & uhrswork1>0
drop if wtsupp<=0
replace wtsupp=wtsupp*wkswork1*uhrswork1
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen mex=0
replace mex=1 if bpl==20000
gen hless=1*(educ>=1 & educ<=73)
collapse (rawsum)wtsupp, by (year hless mex)
egen t_hrs=sum(wtsupp), by(year)
gen phi4=wtsupp/t_hrs
keep if mex==1 & hless==1
merge 1:1 year using "1.dta"
drop _m
merge 1:1 year using "2.dta"
drop _m
merge 1:1 year using "3.dta"
drop _m
replace year=year-1

twoway (line phi1 year, lpattern(vshortdash) lcolor(blue) ) (line phi2 year, lpattern(longdash) lcolor(red) ) (line phi3 year, lpattern(shortdash) lcolor(green) ) (line phi4 year, lpattern(line) lcolor(pink) ), legend(label(1 Foreign-born less than high school) label(2 Foreign-born high school and less) label(3 Mexican-born less than high school) label(4 Mexican-born high school and less) ring(10) position(6) col(2) size(vsmall) ) xtitle("Year") ytitle("") title("Labor hours share") xlab(1993(3)2015, nogrid) ylab(0(0.05)0.2, angle(h) nogrid) graphregion(color(white) ) saving(1l)
graph export "2.pdf", as(pdf) replace

*The following code produces the right Panel of Figure 4. 
use "cps9415.dta", clear
gen years=9 if educ99<=6
replace years=10 if educ99==7
replace years=11 if educ99==8
replace years=12 if educ99==9 | educ99==10
replace years=13.5 if educ99==11
replace years=14 if educ99==12 | educ99==13 | educ99==14
replace years=16 if educ99==15
replace years=17.5 if educ99==16 | educ99==17
replace years=18 if educ99==18
drop if age<18
drop if age>64
drop if educ==0 | educ==1 | educ==999
keep if uhrswork1<100 & uhrswork1>0
drop if wtsupp<=0
gen exp=age-years-6
replace exp=age-16 if age-16<exp
replace exp=0 if exp<0
keep if exp>=0 & exp<=39
gen exper = 1*(exp>=0 & exp<=4) + 2*(exp>=5 & exp<=9)   + 3*(exp>=10 & exp<=14) + 4*(exp>=15 & exp<=19) + 5*(exp>=20 & exp<=24) + 6*(exp>=25 & exp<=29) + 7*(exp>=30 & exp<=34) + 8*(exp>=35 & exp<=39) 
drop if exper==0 
gen black=1*(race==200)
gen edu1=1* (educ>1 & educ<71)+2* (educ>70 & educ<=73) + 3*(educ>73 & educ<110) + 4*(educ>=111)
gen edu2=1* (educ>1 & educ<=73) + 2*(educ>73 & educ<110) + 3*(educ>=111)
drop if edu1==0
drop if edu2==0
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen mex=0
replace mex=1 if bpl==20000
merge m:1 year using "pce"
drop _m
gen earn = incwage/(wkswork1*uhrswork1*pce)
save "cps9415+.dta", replace

keep if wkswork1>=35 
keep if uhrswork1>=35 & uhrswork1<=99
drop if incwage>=9999990
for any 10 13 14 : drop if classwkr==X

collapse (mean) earn [aw=wtsupp], by(year edu1 exper sex black)
save "x1.dta", replace

use "cps9415+.dta", clear 
keep if years==12 & sex==1 & black==0
keep if exp>=8 & exp<=12
replace wtsupp=wtsupp*wkswork1
collapse (mean) earn [aw=wtsupp], by(year)
ren earn earn_hs
sort year
save "x2.dta", replace

use "x1.dta", clear
sort year 
merge m:1 year using "x2.dta"
drop _m
gen peu = earn/earn_hs
collapse (mean)peu, by(exper edu1 sex black)
save "peu.dta", replace

use "cps9415+.dta", clear 
merge m:1 exper edu1 sex black using "peu.dta"
drop _m
save "cps9415++.dta", replace

replace wtsupp=wtsupp*wkswork1*uhrswork1
replace wtsupp=wtsupp*peu
gen lessh=1*(educ>=1 & educ<70)
collapse (rawsum)wtsupp, by (year lessh foreign)
egen t_hrs=sum(wtsupp), by(year)
gen phi1=wtsupp/t_hrs
keep if foreign==1 & lessh==1
save "1.dta", replace

use "cps9415++.dta", clear
replace wtsupp=wtsupp*wkswork1*uhrswork1
replace wtsupp=wtsupp*peu
gen lessh=1*(educ>=1 & educ<70)
collapse (rawsum)wtsupp, by (year lessh mex)
egen t_hrs=sum(wtsupp), by(year)
gen phi3=wtsupp/t_hrs
keep if mex==1 & lessh==1
save "2.dta", replace

use "cps9415++.dta", clear
replace wtsupp=wtsupp*wkswork1*uhrswork1
replace wtsupp=wtsupp*peu
gen hless=1*(educ>=1 & educ<=73)
collapse (rawsum)wtsupp, by (year hless foreign)
egen t_hrs=sum(wtsupp), by(year)
gen phi2=wtsupp/t_hrs
keep if foreign==1 & hless==1
save "3.dta", replace

use "cps9415++.dta", clear
replace wtsupp=wtsupp*wkswork1*uhrswork1
replace wtsupp=wtsupp*peu
gen hless=1*(educ>=1 & educ<=73)
collapse (rawsum)wtsupp, by (year hless mex)
egen t_hrs=sum(wtsupp), by(year)
gen phi4=wtsupp/t_hrs
keep if mex==1 & hless==1
merge 1:1 year using "1.dta"
drop _m
merge 1:1 year using "2.dta"
drop _m
merge 1:1 year using "3.dta"
drop _m
replace year=year-1

twoway (line phi1 year, lpattern(vshortdash) lcolor(blue) ) (line phi2 year, lpattern(longdash) lcolor(red) ) (line phi3 year, lpattern(shortdash) lcolor(green) ) (line phi4 year, lpattern(line) lcolor(pink) ), legend(label(1 Foreign-born less than high school) label(2 Foreign-born high school and less) label(3 Mexican-born less than high school) label(4 Mexican-born high school and less) ring(10) position(6) col(2) size(vsmall) ) xtitle("Year") ytitle("") title("PEU-weighted labor hour share") xlab(1993(3)2015, nogrid) ylab(0(0.05)0.2, angle(h) nogrid) graphregion(color(white) ) saving(6l) 

graph export "7.pdf", as(pdf) replace

******************************************************************************************************************
******************************************************************************************************************
*To produce Figure 5, we just need to replicate the above code which produces Figure 4, but conditional on border states. 
*Specifically, I add the following conditional on the CPS sample. 
ren statecensus sta
keep if sta==86 | sta==93 | sta==88 | sta==85 | sta==74 

******************************************************************************************************************
******************************************************************************************************************
*Figure 6 is produced for three year 1990, 2000, 2010. For each year, the plot is made by combining the US and Mexican Census both for male and for female. 
*Below just provide the code for year 1990 and for male. The code can be replicated to plot for year 1990 and 2000, and also for female. 

*use "mex2010.dta", clear
*use "mex2000.dta", clear
use "mex1990.dta", clear
keep if age>=21 & age<=65
drop if yrs<0 
drop if yrs>90

gen edu1=1*(yrschool>=0 & yrschool<=4)
gen edu2=1*(yrschool>=5 & yrschool<=8)
gen edu3=1*(yrschool==9) 
gen edu4=1*(yrschool>=10 & yrschool<=11) 
gen edu5=1*(yrschool==12) 
gen edu6=1*(yrschool>=13 & yrschool<=15)
gen edu7=1*(yrschool>=16) 
gen mar=1*(marst==2)
gen agesq = age*age
replace agesq = agesq/100
*drop if inc>=99999998
keep perwt sex incearn edu1 edu2 edu3 edu4 edu5 edu6 edu7 mar age agesq hrswork1ren incearn incwage
gen im=0
save "1.dta", replace 

*use "us2010.dta", clear
*use "us2000.dta", clear
use "us1990.dta", clear
keep if age>=21 & age<=65
drop if educ==0 
keep if bpld==20000
keep if yrimmig>=2000
gen ageen=age-(2010-yrimmig)
keep if ageen>=21 
gen edu1=1*(educ==1)
gen edu2=1*(educ==2)
gen edu3=1*(educ==3) 
gen edu4=1*(educ==4 | educ==5) 
gen edu5=1*(educ==6) 
gen edu6=1*(educ>=7 & educ<=9)
gen edu7=1*(educ>=10 & educ<=11) 
gen mar=1*(marst==1 | marst==2)
gen agesq = age*age
replace agesq = agesq/100
keep perwt sex incwage edu1 edu2 edu3 edu4 edu5 edu6 edu7 mar age agesq
gen im=1
append using "1.dta"
erase "1.dta"
save "DFL_data.dta", replace

* CREATE INTERACTION TERMS;

use "DFL_data.dta", clear
forvalues n=1(1)7{
gen int`n' = edu`n'*age
}
forvalues n=1(1)7{
gen int1`n' = edu`n'*agesq
}
forvalues n=1(1)7{
gen int2`n' = edu`n'*mar
}

gen int31 = mar*age
gen int32 = mar*agesq

keep if sex==1
gen lpar = 1*(incwage>0)

*logit lpar edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if im==0 
*predict probmex, p
*logit lpar edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if im==1 
*predict probus, p
*gen partwt = probus/probmex
logit im edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] 
predict probim, p
gen probstay=1-probim
drop if incwage>9000000
replace incwage=incwage/(hrswork1*4.5)
*replace incwage=(incwage/(hrswork1*4.5))/2838.3572 
gen lnw=log(incwage)
drop if hrswork1>100
gen partwt=1
gen rew = partwt*(probim/probstay)*perwt
keep if im==0
egen x=mean(lnw)
replace lnw=lnw-x
keep if lnw<5 & lnw>-5
drop x
twoway(kdensity lnw if im==0 [aw = rew], width(.07) gauss  lcolor(red)  lpattern(line)) (kdensity lnw if im==0 [aw = perwt], width(.07) gauss lcolor(blue)  lpattern(dash)), legend(label(1 Immigrants counterfactual) label(2 Non-immigrants) ring(10) position(6) col(2) size(small)) xtitle("Log Wage") ytitle("Density") title("") xlab(-5(1)5) ylab(0(0.1)0.6, nogrid) graphregion(fcolor(white)) 
graph export "6.pdf", as(pdf) replace

******************************************************************************************************************
******************************************************************************************************************
*Figure 6 and 7 is produced by using the same datasets. Below provide the code to produce the top 2 plots of Figure 7 for male. 
* The plot for female can be replicated by simply conditional on female sample. 

use "mex2010.dta", clear
keep if age>=21 & age<=65
drop if yrs<0 
drop if yrs>90
gen edu1=1*(yrschool>=0 & yrschool<=4)
gen edu2=1*(yrschool>=5 & yrschool<=8)
gen edu3=1*(yrschool==9) 
gen edu4=1*(yrschool>=10 & yrschool<=11) 
gen edu5=1*(yrschool==12) 
gen edu6=1*(yrschool>=13 & yrschool<=15)
gen edu7=1*(yrschool>=16) 
gen mar=1*(marst==2)
gen agesq = age*age
replace agesq = agesq/100
*drop if inc>=99999998
keep perwt sex incearn edu1 edu2 edu3 edu4 edu5 edu6 edu7 mar age agesq 
ren incearn incwage
gen im=0
gen year=2010
save "m2010.dta", replace 

use "mex2000.dta", clear
keep if age>=21 & age<=65
drop if yrs<0 
drop if yrs>90
gen edu1=1*(yrschool>=0 & yrschool<=4)
gen edu2=1*(yrschool>=5 & yrschool<=8)
gen edu3=1*(yrschool==9) 
gen edu4=1*(yrschool>=10 & yrschool<=11) 
gen edu5=1*(yrschool==12) 
gen edu6=1*(yrschool>=13 & yrschool<=15)
gen edu7=1*(yrschool>=16) 
gen mar=1*(marst==2)
gen agesq = age*age
replace agesq = agesq/100
*drop if inc>=99999998
keep perwt sex incearn edu1 edu2 edu3 edu4 edu5 edu6 edu7 mar age agesq hrswork1
ren incearn incwage
gen im=0
gen year=2000
save "m2000.dta", replace 

use "mex1990.dta", clear
keep if age>=21 & age<=65
drop if yrs<0 
drop if yrs>90
gen edu1=1*(yrschool>=0 & yrschool<=4)
gen edu2=1*(yrschool>=5 & yrschool<=8)
gen edu3=1*(yrschool==9) 
gen edu4=1*(yrschool>=10 & yrschool<=11) 
gen edu5=1*(yrschool==12) 
gen edu6=1*(yrschool>=13 & yrschool<=15)
gen edu7=1*(yrschool>=16) 
gen mar=1*(marst==2)
gen agesq = age*age
replace agesq = agesq/100
*drop if inc>=99999998
keep perwt sex incearn edu1 edu2 edu3 edu4 edu5 edu6 edu7 mar age agesq hrswork1
ren incearn incwage
gen im=0
gen year=1990
save "m1990.dta", replace 

use "us1990.dta", clear
keep if age>=21 & age<=65
drop if educ==0 
keep if bpld==20000
keep if yrimmig>=1980
gen edu1=1*(educ==1)
gen edu2=1*(educ==2)
gen edu3=1*(educ==3) 
gen edu4=1*(educ==4 | educ==5) 
gen edu5=1*(educ==6) 
gen edu6=1*(educ>=7 & educ<=9)
gen edu7=1*(educ>=10 & educ<=11) 
gen mar=1*(marst==1 | marst==2)
gen agesq = age*age
replace agesq = agesq/100
gen ageen=age-(1990-yrimmig)
keep if ageen>=21
keep perwt sex incwage edu1 edu2 edu3 edu4 edu5 edu6 edu7 mar age agesq
gen im=1
gen year=1990
save "u1990.dta", replace 

use "us2000.dta", clear
keep if age>=21 & age<=65
drop if educ==0 
keep if bpld==20000
keep if yrimmig>=1990
gen edu1=1*(educ==1)
gen edu2=1*(educ==2)
gen edu3=1*(educ==3) 
gen edu4=1*(educ==4 | educ==5) 
gen edu5=1*(educ==6) 
gen edu6=1*(educ>=7 & educ<=9)
gen edu7=1*(educ>=10 & educ<=11) 
gen mar=1*(marst==1 | marst==2)
gen agesq = age*age
replace agesq = agesq/100
gen ageen=age-(2000-yrimmig)
keep if ageen>=21
keep perwt sex incwage edu1 edu2 edu3 edu4 edu5 edu6 edu7 mar age agesq
gen im=1
gen year=2000
save "u2000.dta", replace 

use "us2010.dta", clear
keep if age>=21 & age<=65
drop if educ==0 
keep if bpld==20000
keep if yrimmig>=2000
gen edu1=1*(educ==1)
gen edu2=1*(educ==2)
gen edu3=1*(educ==3) 
gen edu4=1*(educ==4 | educ==5) 
gen edu5=1*(educ==6) 
gen edu6=1*(educ>=7 & educ<=9)
gen edu7=1*(educ>=10 & educ<=11) 
gen mar=1*(marst==1 | marst==2)
gen agesq = age*age
replace agesq = agesq/100
gen ageen=age-(2010-yrimmig)
keep if ageen>=21
keep perwt sex incwage edu1 edu2 edu3 edu4 edu5 edu6 edu7 mar age agesq
gen im=1
gen year=2010

append using "m2010.dta"
append using "m2000.dta"
append using "m1990.dta"
append using "u2000.dta"
append using "u1990.dta"
erase "m2010.dta"
erase "m2000.dta"
erase "m1990.dta"
erase "u2000.dta"
erase "u1990.dta"
save "DFL_dif.dta", replace

* CREATE INTERACTIONS TERMS;

use "DFL_dif.dta", clear
forvalues n=1(1)7{
gen int`n' = edu`n'*age
}
forvalues n=1(1)7{
gen int1`n' = edu`n'*agesq
}
forvalues n=1(1)7{
gen int2`n' = edu`n'*mar
}
gen int31 = mar*age
gen int32 = mar*agesq
keep if sex==1
gen lpar = 1*(incwage>0)

/*
logit lpar edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if im==0 & year==1990
predict probmex1990, p
logit lpar edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if im==0 & year==2000
predict probmex2000, p
logit lpar edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if im==0 & year==2010
predict probmex2010, p
logit lpar edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if im==1 & year==1990
predict probus1990, p
logit lpar edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if im==1 & year==2000
predict probus2000, p
logit lpar edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if im==1 & year==2010
predict probus2010, p
gen partwtm1 = probmex2000/probmex1990
gen partwtm2 = probmex2010/probmex1990
gen partwtu0 = probus1990/probmex1990
gen partwtu1 = probus2000/probmex1990
gen partwtu2 = probus2010/probmex1990
*/
 
gen partwtm1 = 1
gen partwtm2 = 1
gen partwtu0 = 1
gen partwtu1 = 1
gen partwtu2 = 1

logit im edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if year==1990
predict probim1990, p 
gen probstay1990=1-probim1990
logit im edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if year==2000
predict probim2000, p
gen probstay2000=1-probim2000
logit im edu1 edu2 edu3 edu4 edu5 edu6 edu7 age agesq mar int1-int7 int11-int17 int21-int27 int31 int32 [pweight=perwt] if year==2010
predict probim2010, p
gen probstay2010=1-probim2010
keep if incwage>0
drop if incwage>9000000
replace incwage=(incwage/(hrswork1*4.5))/2838.3572 
gen lnw=log(incwage)
drop if hrswork1>100

gen rewm1=partwtm1*(probstay2000/probstay1990)*perwt
gen rewm2=partwtm2*(probstay2010/probstay1990)*perwt
gen rewu0=partwtu0*(probim1990/probstay1990)*perwt
gen rewu1=partwtu1*(probim2000/probstay1990)*perwt
gen rewu2=partwtu2*(probim2010/probstay1990)*perwt
keep if im==0 & year==1990
egen x=mean(lnw) if lnw>-15 & lnw<15
replace lnw=lnw-x
keep if lnw>-5 & lnw<5
drop x
kdensity lnw [aw=perwt], width(.07) gauss generate(x dm0) 
drop x
kdensity lnw [aw=rewm1], width(.07) gauss generate(x dm1) 
drop x
kdensity lnw [aw=rewm2], width(.07) gauss generate(x dm2) 
drop x
kdensity lnw [aw=rewu0], width(.07) gauss generate(x du0) 
drop x
kdensity lnw [aw=rewu1], width(.07) gauss generate(x du1) 
drop x
kdensity lnw [aw=rewu2], width(.07) gauss generate(x du2) 
gen m0=du0-dm0
gen m1=du1-dm1
gen m2=du2-dm2

twoway(line m0 x, lcolor(blue) lpattern(line)) (line m1 x, lcolor(green) lpattern(longdash)) (line m2 x, lcolor(red) lpattern(shortdash)), legend(label(1 1990) label(2 2000) label(3 2010) ring(10) position(6) col(3) ) xtitle("Log Wage") ytitle("Density") xlab(-5(1)5) ylab(-0.15(0.05)0.15, angle(h)  nogrid) graphregion(fcolor(white)) 
graph export "7a.pdf", as(pdf) replace
gen ddm0=m1-m0
gen ddm1=m2-m0
gen ddm2=m2-m1
twoway(line ddm1 x, lcolor(black) lpattern(line)) (line ddm2 x, lcolor(pink) lpattern(longdash)), legend(label(1 2010 minus 1990) label(2 2010 minus 2000) ring(10) position(6) col(3) ) xtitle("Log Wage") ytitle("Density") xlab(-5(1)5) ylab(-0.15(0.05)0.15, angle(h) nogrid) graphregion(fcolor(white)) 
graph export "7b.pdf", as(pdf) replace

******************************************************************************************************************
******************************************************************************************************************
* The following code produces Figure 8 of this paper. We base the analysis on data UNU-WIDER, World Income Inequality Database; World Bank, World Development Indicators. 
*We assume normal distribution on GDP/K to obtain its percentile values. Details are described in the footnote 14 of this paper. 

use "wiid_ver3_sept15.dta", clear
keep if Countrycode2=="MX"|Countrycode2=="US"|Countrycode2=="SV"| ///
Countrycode2=="GT"|Countrycode2=="DO"|Countrycode2=="JM"| ///
Countrycode2=="CO"|Countrycode2=="HN"|Countrycode2=="EC"
format %16s Country
format %26s Source
keep Country Year Gini Quality Source
drop if Gini==.
drop if Year<1950
sort Country Year
egen Country_code=group(Country)
gen id=Year*100+Country_code
gen n=_n
order n

by id, sort: egen double max_qual=min(Quality)
sort n
drop if Quality>max_qual
by id, sort: egen double Gini_average=mean(Gini)
by id, sort: egen double delete=min(Gini)
drop if Gini!=delete
sort n
by id, sort: egen repeated=count(id)
gen id2=id*100+repeated
by id2, sort: egen repeated2=min(n)
drop if n!=repeated2
drop Gini Source delete id n max_qual repeated id2 repeated2 Quality Country_code
save "Gini1.dta",replace

use "Master.dta", clear
merge 1:1 Country Year using Gini1
format %16s Country
egen Country_code=group(Country)

by Country: ipolate Gini_average Year, generate(Gini)
drop if Year<1990
drop Gini_average _merge
xtset Country_code Year
gen double Gini2=Gini
replace Gini2=l.Gini2 if Gini2==.
gen Last_Gini=Year if Gini2!=Gini
by Country_code, sort: egen Last_Gini2=min(Last_Gini)
gen Last_Gini3=Last_Gini2-1
drop Last_Gini Last_Gini2 Gini
rename Gini2 Gini
rename Last_Gini3 Last_Gini
save "Gini2.dta",replace
gen St_Dev=(2^(.5))*invnormal((Gini/100+1)/2)
gen x_25=GDP_PC*exp(St_Dev*invnormal(0.25)-((St_Dev)^2)/2)
gen x_50=GDP_PC*exp(St_Dev*invnormal(0.50)-((St_Dev)^2)/2)
drop GDP_PC Gini St_Dev Country_code
replace Country="MX" if Country=="Mexico"
replace Country="US" if Country=="United States"
replace Country="SV" if Country=="El Salvador"
replace Country="GT" if Country=="Guatemala"
replace Country="DO" if Country=="Dominican Republic"
replace Country="JM" if Country=="Jamaica"
replace Country="CO" if Country=="Colombia"
replace Country="HN" if Country=="Honduras"
replace Country="EC" if Country=="Ecuador"

reshape wide x_25 x_50 Last_Gini, i(Year) j(Country) string

*Mexico vs. USA
gen ratio50_25_MX=x_50MX/x_25US

*Set of Lastin America vs. USA
gen W_SV=0.272919
gen W_DO=0.186263
gen W_GT=0.154663
gen W_JM=0.116669
gen W_CO=0.108165
gen W_HN=0.087478
gen W_EC=0.073842
gen Weight1=W_SV+W_DO+W_GT+W_JM+W_CO+W_HN+W_EC

gen x_50LA=(W_SV*x_50SV) + (W_DO*x_50DO) + (W_GT*x_50GT) + (W_JM*x_50JM) + (W_CO*x_50CO) + (W_HN*x_50HN) + (W_EC*x_50EC)
gen ratio50_25_LA=x_50LA/x_25US

line ratio50_25_MX Year, ytitle("Ratio of 50{superscript:th}/25{superscript:th} percentile of income")
line ratio50_25_LA Year, ytitle("Ratio of 50{superscript:th}/25{superscript:th} percentile of income")

twoway (line ratio50_25_MX Year,  lp(solid) lc(blue) lwidth(thick) ), legend(off) xtitle("Year") ytitle("Mexico Income/US Income") title("") xlab(1990(5)2015, nogrid) ylab(0.4(0.1)0.6, angle(h) nogrid) graphregion(color(white) ) 
graph export "ratio_mx.pdf", as(pdf) replace

twoway (line ratio50_25_LA Year,  lp(solid) lc(blue) lwidth(thick) ), legend(off) xtitle("Year") ytitle("Latin American Income/US Income") title("") xlab(1990(5)2015, nogrid) ylab(0.2(0.1)0.4, angle(h) nogrid) graphregion(color(white) ) 
graph export "ratio_la.pdf", as(pdf) replace

******************************************************************************************************************
******************************************************************************************************************
* The following code produces Figure 9 of this paper. We collect GDP data from World Bank, World Development Indicators. 

use "GDP.dta"
gen date2=yq(year,quarter)
format date2 %tq
gen date=_n
order date date2
tsset date 

foreach string in "MX" "US" "SV" "DO" "GT" "JM" "CO" "HN" "EC" {
gen double gdpg_`string'=100*(((gdp_`string'/l.gdp_`string')^4)-1)
}
save "GDP1.dta", replace

**USA vs Mex
drop if date==1
keep date date2 year quarter gdp_MX gdp_US gdpg_MX gdpg_US
save "GDP1_MX.dta",replace

foreach string in "MX" "US" {
rolling sd_gdp_`string' =r(sd), window(8) keep(date) saving(sd_gdp_`string' , replace): sum gdpg_`string'
use sd_gdp_`string', clear
drop start end
save sd_gdp_`string', replace
clear
use "GDP1_MX.dta"
merge 1:1 date using "sd_gdp_`string'.dta"
drop _merge
save "GDP1_MX.dta", replace
}

label variable sd_gdp_MX "Mexico"
label variable sd_gdp_US "United States"
label variable date2 "Date"
*drop if date<9
sum sd_gdp_MX sd_gdp_US
line sd_gdp_MX sd_gdp_US date2, title("Standard Dev. of GDP Growth") lwidth(thick thick) lcolor(blue red)

**USA vs Latin America
use "GDP1.dta", clear
drop gdp_MX gdpg_MX
scalar define cut=42
drop if date<cut
replace date=date-cut+2
drop gdp_*
save "GDP1_LA.dta",replace

foreach string in "US" "SV" "DO" "GT" "JM" "CO" "HN" "EC" {
rolling sd_gdp_`string' =r(sd), window(8) keep(date) saving(sd_gdp_`string' , replace): sum gdpg_`string'
use sd_gdp_`string', clear
drop start end
save sd_gdp_`string', replace
clear
use "GDP1_LA.dta"
merge 1:1 date using "sd_gdp_`string'.dta"
drop _merge
save "GDP1_LA.dta", replace
}

gen W_SV=0.272919
gen W_DO=0.186263
gen W_GT=0.154663
gen W_JM=0.116669
gen W_CO=0.108165
gen W_HN=0.087478
gen W_EC=0.073842
gen Weight1=W_SV+W_DO+W_GT+W_JM+W_CO+W_HN+W_EC
gen sd_gdp_LA=(W_SV*sd_gdp_SV)+(W_DO*sd_gdp_DO)+(W_GT*sd_gdp_GT)+(W_JM*sd_gdp_JM)+(W_CO*sd_gdp_CO)+(W_HN*sd_gdp_HN)+(W_EC*sd_gdp_EC)

label variable sd_gdp_US "United States"
label variable sd_gdp_LA "Latin America"
label variable date2 "Date"

line sd_gdp_LA sd_gdp_US date2, title("Standard Dev. of GDP Growth") lwidth(thick thick) lcolor(blue red)

******************************************************************************************************************
******************************************************************************************************************
* The following code produces Figure 10 of this paper.

use "BP_data"
format southwest nationwide %15.0fc
label variable southwest "Southwest"
label variable nationwide "Nationwide"

twoway (line southwest year, lpattern(vshortdash) lcolor(blue) ) (line nationwide year, lpattern(longdash) lcolor(red) ), legend(label(1 Southwestern States) label(2 Nationwide) ring(10) position(6) col(2) size(medium) ) xtitle("Year") ytitle("No. of officers") title("") xlab(1992(4)2016, nogrid) ylab(0(5000)25000, angle(h) nogrid) graphregion(color(white) ) 
graph export "10.pdf", as(pdf) replace

******************************************************************************************************************
******************************************************************************************************************
* The following code produces Figure 11, 12 14-17 of this paper. 
* The dataset 1980-2015.dta is taken from Hanson & McIntosh (2016). It has 27 immigrant-sending countries in year 1980, 1990, 2000, 2010 and 2015, by birth cohort and gender. It has variables on population, GDP, the number of immigrants at each year.  
* The dataset 2020-2050.dta is also taken from Hanson & McIntosh (2016). It has the same structure as dataset 1980-2015.dta, but differ in that the variable are projected values on birth cohort and GDP/K during 2020-2050.  
* The dataset US_gdp_updated_redux.dta has yearly GDP data for the US and Mexico from 1991-2016. 
 
#delimit;
clear all;
set memory 250m;
set more off;

use "1980-2015.dta", clear;
append using "2020-2050.dta";
merge m:1 year using "US_gdp_updated_redux.dta";
drop _merge;

gen birth_midpoint = .;
replace birth_midpoint = 1903 if birth=="1901-1905";
replace birth_midpoint = 1908 if birth=="1906-1910";
replace birth_midpoint = 1913 if birth=="1911-1915";
replace birth_midpoint = 1918 if birth=="1916-1920";
replace birth_midpoint = 1923 if birth=="1921-1925";
replace birth_midpoint = 1928 if birth=="1926-1930";
replace birth_midpoint = 1933 if birth=="1931-1935";
replace birth_midpoint = 1938 if birth=="1936-1940";
replace birth_midpoint = 1943 if birth=="1941-1945";
replace birth_midpoint = 1948 if birth=="1946-1950";
replace birth_midpoint = 1953 if birth=="1951-1955";
replace birth_midpoint = 1958 if birth=="1956-1960";
replace birth_midpoint = 1963 if birth=="1961-1965";
replace birth_midpoint = 1968 if birth=="1966-1970";
replace birth_midpoint = 1973 if birth=="1971-1975";
replace birth_midpoint = 1978 if birth=="1976-1980";
replace birth_midpoint = 1983 if birth=="1981-1985";
replace birth_midpoint = 1988 if birth=="1986-1990";
replace birth_midpoint = 1993 if birth=="1991-1995";
replace birth_midpoint = 1998 if birth=="1996-2000";
replace birth_midpoint = 2003 if birth=="2001-2005";
replace birth_midpoint = 2008 if birth=="2006-2010";
replace birth_midpoint = 2013 if birth=="2011-2015";
replace birth_midpoint = 2018 if birth=="2016-2020";
replace birth_midpoint = 2023 if birth=="2021-2025";
replace birth_midpoint = 2028 if birth=="2026-2030";
replace birth_midpoint = 2033 if birth=="2031-2035";
replace birth_midpoint = 2038 if birth=="2036-2040";
replace birth_midpoint = 2043 if birth=="2041-2045";
replace birth_midpoint = 2048 if birth=="2046-2050";
rename birth birth_bracket;
rename birth_midpoint birth_cohort;

gen age = year - birth_cohort;

drop if pop==.;

egen cohort_id = group(country birth_cohort sex);
egen cohort_group_id = group(birth_cohort sex);
gen migperc = 100 * perwt/base_pop;

sort cohort_id year;
gen dmigperc = migperc - migperc[_n-1] if cohort_id==cohort_id[_n-1];
gen ldmigperc = dmigperc[_n-1] if cohort_id==cohort_id[_n-1];

/*making migration rates decadal;*/
replace dmigperc = dmigperc*2 if year==2015;
replace country = "ElSalvador" if country=="El Salvador";
replace country = "DominicanRepublic" if country=="Dominican Republic";
replace country = "USA" if country=="United States of America";

rename perwt migrants;
rename base_pop births;
foreach x in births gdp gdp17 {;
	gen `x'1 = `x' if country=="USA";
	egen dest`x' = mean(`x'1), by(birth_cohort sex);
	drop `x'1;
	};

gen female = (sex=="female");
gen brat = births/destbirths;
gen gdprat = gdp/destgdp;
gen gdp17rat = gdp17/destgdp17;
gen lbrat = ln(brat);
gen lgdprat = ln(gdprat);
gen lgdp17rat = ln(gdp17rat);

/*creating the cross-cohort changes in lbrat*/
preserve;
collapse lbrat  female birth_cohort (first) country, by(cohort_id);
sort country female birth_cohort;
gen dlbrat = lbrat - lbrat[_n-1] if (country==country[_n-1] & female==female[_n-1]);
drop lbrat;
save "temp", replace;
restore;
merge m:1 country female birth_cohort using "temp";

drop if country=="USA";
drop if country=="Cuba";

encode country, gen(cnum);
/*
replace cnum = 999 if country=="Mexico";
replace cnum = cnum + 1 if cnum<=17;
replace cnum = 1 if country=="Mexico"; 
*/

gen young = (age<=40);

gen year1 = year if (year<=1990);
replace year1 = 0 if (year>=2000 & year!=.);
gen year2 = year if (year>=2000 & year!=.);
replace year2 = 0 if (year<=1990);

gen yeardum1 = (year<=1990);
gen yeardum2 = (year>=2000);

gen yr = year-1980;
gen yrsq = yr^2;

foreach x in year1 year2 yeardum1 yeardum2 yr yrsq lgdprat lgdp17rat lbrat female {;
	gen young_`x' = young * `x';
	};
	
forvalues x = 2/25 {;
	gen cnumyy_young_`x'=  1 if cnum==`x' & young==1;
	replace cnumyy_young_`x' = 0 if young==0;
	replace cnumyy_young_`x' = 0 if cnum!=`x';
	gen cnumx_lbrat_`x' = lbrat if cnum==`x';
	replace cnumx_lbrat_`x' = 0 if cnum!=`x';
	gen cnumz_year_`x' = yr if cnum==`x';
	replace cnumz_year_`x' = 0 if cnum!=`x';
		gen young_cnumz_year_`x' = young * cnumz_year_`x';		
	gen cnums_yearsq_`x' = yrsq if cnum==`x';
	replace cnums_yearsq_`x' = 0 if cnum!=`x';
	gen cnumzz_year1_`x' = year1 if cnum==`x';
	replace cnumzz_year1_`x' = 0 if cnum!=`x';
		gen young_cnumzz_year1_`x' = young * cnumzz_year1_`x';
	gen cnumzz_year2_`x' = year2 if cnum==`x';
	replace cnumzz_year2_`x' = 0 if cnum!=`x';
		gen young_cnumzz_year2_`x' = young * cnumzz_year2_`x';
	gen cyeardum2_`x' = 0;
	replace cyeardum2_`x' = 1 if cnum==`x' & yeardum2==1;
		gen young_cyeardum2_`x' = young * cyeardum2_`x';
	gen cnumgg_gdp_`x' = lgdprat if cnum==`x';
	replace cnumgg_gdp_`x' = 0 if cnum!=`x';
/*
	foreach y in cnumz_year cnums_yearsq cnumzz_year1 cnumzz_year2 cyeardum1 cyeardum2 {;
		gen `y'_young_`x' = `y'_`x' * young;
		};
*/
	};

foreach x in	17	22	27	32	37	42	47	52	57	62	67	72	77  {;
	gen agex_`x'_lbrat = lbrat if age==`x';
	replace agex_`x'_lbrat = 0 if age!=`x';
	gen ageyearx_`x' = yr if age==`x';
	replace ageyearx_`x' = 0 if age!=`x';
	gen ageyearsqx_`x' = yrsq if age==`x';
	replace ageyearsqx_`x' = 0 if age!=`x';
	gen agezz_year1_`x' = year1 if age==`x';
	replace agezz_year1_`x' = 0 if age!=`x';
	gen agezz_year2_`x' = year2 if age==`x';
	replace agezz_year2_`x' = 0 if age!=`x';
	gen agegg_gdp_`x' = lgdprat if age==`x';
	replace agegg_gdp_`x' = 0 if age!=`x';
	};

keep if age<=70;

reg migperc lbrat young_lbrat lgdprat young_lgdprat female young_female i.age i.cnum  cnumyy_young_*  cyeardum2* young_cyeardum2* cnumzz_year1* young_cnumzz_year1* cnumzz_year2*  young_cnumzz_year2* [aw = births] , cluster(cohort_group_id);
outreg2 using "mex0", ctitle("prediction eq") nocons  nolabel bdec(4)  replace;
predict migperc_hat;

/*calculating actual numbers of migrants based on the predictions:*/
replace migperc_hat= migperc if year<=2015;
gen number_hat = (migperc_hat/100) * births;

/*dropping 2050:*/
drop if year==2050;

/*TABLE 6: THOUSANDS OF FOREIGN-BORN INDIVIDUALS RESIDENT IN THE US, BY YEAR:*/
/*Table of predicted foreign-born migrant counts by year and age:*/
#delimit;
drop if country=="Cuba";
drop if country=="Venezuela";
keep if (year==1980 | year==2015 | year==2040);
*gen young = (age<40);
gen old = (age>40 & age!=.);
replace number_hat = number_hat/1000;
collapse (sum) number_hat, by(country year old);
sort number_hat;
reshape wide number_hat, i(country old) j(year);
reshape wide number_hat*, i(country) j(old);
order country number_hat19800	number_hat19801	number_hat20150	number_hat20151	number_hat20400	number_hat20401;


/*FIGURE 17:  Densities of Age Distributions of Mexicans in US by year:*/
#delimit;
preserve;
keep if (year==1980 | year==2015 | year==2040);
keep if country=="Mexico";
drop if age>70;
collapse (sum) number_hat, by(cnum age year);
replace number_hat = number_hat/1000;
foreach x in 1980 2015 2040 {;
	lowess number_hat age if year==`x', nograph gen(p_`x');
	label var p_`x' "`x'";
	};
label var p_2040 "2040, predicted";
sort age;
tw line p_1980 age || line p_2015 age || line p_2040 age, lpattern(dash) ytitle("Thousands per 5-year age cohort") 
	xtitle("Midpoint of 5-year Age Cohort") xlabel(0 "0" 10 "10" 20 "20" 30 "30" 40 "40" 50 "50" 60 "60" 70 "70")
	title("Age Frequencies of Mexican-born in US, by year") saving("migration_age_freq", replace);
restore;

*Pictures of predicted future migration rates by census year and origin country:*/
#delimit;
preserve;
keep if age>=15 & age<=40;
keep if (country=="Mexico" | country=="Guatemala" | country=="Honduras" |  country=="ElSalvador");
collapse migperc_hat  (sum) number_hat [aw=births], by(year country);
replace number_hat = number_hat/1000000;
sort year;
*replace migrants = migrants/1000000;
foreach x in Mexico Guatemala Honduras ElSalvador {;
	gen `x' = migperc_hat if country=="`x'";
	gen `x'_n = number_hat if country=="`x'";
	label var `x'_n "`x'";
	};
*FIGURE 15:  ACTUAL AND PREDICTED MIGRATION RATES BY ORIGIN.*/
line Mexico year , lwidth(thick)|| line Guatemala year , lpattern(dash) || line Honduras year , lpattern(dot) || line ElSalvador year, lpattern(dashdot)
	ytitle("Migration rate for cohorts aged 15-40") xtitle("Year") title("Actual and Predicted Migration Rates by Origin") xline(2017.5)
	xlabel(1980 "1980" 1990 "1990" 2000 "2000" 2010 "2010" 2015 "15" 2020 "2020" 2030 "2030" 2040 "2040") yline(0)
	text(2 1995 "Actual", place(e)) text(2 2030 "Predicted", place(e)) saving("migration_rates", replace);
*FIGURE 16:  STOCK OF FOREIGN-BORN MIGRANTS.*/
#delimit;
line Mexico_n year , lwidth(thick)|| line Guatemala_n year , lpattern(dash) || line Honduras_n year , lpattern(dot) || line ElSalvador_n year, lpattern(dashdot)
	ytitle("Millions of migrants aged 15-40") xtitle("Year") title("Stock of Foreign-born Migrants aged 15-40 by Origin") 
	xlabel(1980 "1980" 1990 "1990" 2000 "2000" 2010 "2010" 2015 "15" 2020 "2020" 2030 "2030" 2040 "2040") yline(0) xline(2017.5)
	text(2 1995 "Actual", place(e)) text(2 2030 "Predicted", place(e)) saving("migration_stocks", replace);
restore;

*total number of people in the US by origin country and year:*/
#delimit;
keep if (year==1980 | year==2015 | year==2040);
collapse (sum) number_hat, by(year country young);
gen migrants =number_hat/1000;
drop number_hat;
reshape wide  migrants, i(country young) j(year);

*FIGURE 11:  Table of home cohort and migrant counts by year:*/
#delimit;
preserve;
keep if age>=15 & age<=40;
keep if year<=2015;
drop if country=="Cuba";
collapse (sum) births migrants, by(year country);
replace births = births/1000;
replace migrants = migrants/1000;
reshape wide births migrants, i(country) j(year);
gen birthchange = 100*(births2015 - births1980) / births1980;
gen migchange = 100*(migrants2015 - migrants1980) / migrants1980; 
regr migchange birthchange  [aw=births1980];
predict mighat;
label var mighat "Predicted Values";
tw scatter migchange birthchange , xtitle("% change in birth cohort size, 1980-2015") ytitle("% change in migration rate, 1980-2015") mlabel(country) mlabangle(39)
	title("US Migration Changes on Labor Supply Changes, LAC");
sort country;

*Pictures of observed historical migration rates by census year and origin country:*/
#delimit;
*preserve;
keep if age>=15 & age<=40;
*keep if year<=2015;
keep if (country=="Mexico" | country=="Guatemala" | country=="Honduras" |  country=="ElSalvador"| country=="DominicanRepublic" | country=="Colombia");
collapse  migperc destbirths (sum) migrants births [aw=births], by(year country);
sort year;
replace migrants = migrants/1000000;
foreach x in Mexico Guatemala Honduras ElSalvador DominicanRepublic Colombia{;
	gen `x' = migperc if country=="`x'";
	gen `x'_n = migrants if country=="`x'";
	gen `x'_b = births if country=="`x'";
	gen `x'_rat = `x'_b/destbirths if country=="`x'";
	gen `x'_bs = `x'_rat if year==1980;
	egen `x'_bsm = mean(`x'_bs), by(country);
	gen `x'_ratn= `x'_rat/`x'_bsm;
	label var `x'_n "`x'";
	label var `x'_ratn "`x'";
	};

	
*FIGURE 12:  MIGRATION RATES VERSUS COUNTS:*/
line Mexico year , lwidth(thick)|| line Guatemala year , lpattern(dash) || line Honduras year , lpattern(dot) || line ElSalvador year, lpattern(dash_dot) || line DominicanRepublic year, lpattern(shortdash)
	ytitle("Average migration rate aged 15-40") xtitle("Year") title("Migration rates")
	xlabel(1980 "1980" 1990 "1990" 2000 "2000" 2010 "2010" 2015 "2015") saving(graph1, replace);
line Mexico_n year , lwidth(thick)|| line Guatemala_n year , lpattern(dash) || line Honduras_n year , lpattern(dot) || line ElSalvador_n year, lpattern(dash_dot)  || line DominicanRepublic_n year, lpattern(shortdash)
	ytitle("Millions of migrants aged 15-40") xtitle("Year") title("Migrant counts")
	xlabel(1980 "1980" 1990 "1990" 2000 "2000" 2010 "2010" 2015 "2015")   saving(graph2, replace);
grc1leg  graph1.gph graph2.gph, row(1) legendfrom(graph1.gph) title("Migration Rates versus Counts, ages 15-40");


/*FIGURE 14:  RELATIVE LABOR SUPPLY RATIOS BY ORIGIN COUNTRY:*/
line Mexico_ratn year , lwidth(thick)|| line Guatemala_ratn year , lpattern(dash) || line Honduras_ratn year , lpattern(dot) 
	|| line ElSalvador_ratn year, lpattern(dash_dot) ,
	ytitle("Change in Origin/US labor supply ratios") xtitle("Year") title("Relative Labor Supply Ratios by Origin Country")
	xlabel(1980 "1980" 1990 "1990" 2000 "2000" 2010 "2010" 2015 "15" 2020 "2020" 2030 "2030" 2040 "2040" 2050 "2050") xline(2017.5)
	note("Average ratio of Origin/US birth cohort size for all native born ages 15-40, 1980=1");
restore;

******************************************************************************************************************
******************************************************************************************************************
* The following code produces Figure 13 of this paper.
* The dataset US_gdp_updated.dta has quarterly GDP data for the US, Mexico, and other Latin American countries from year 1991-2016.  

/*
#delimit;
clear;
set matsize 800;
set memory 200m;
set more off;
global effort "lwapp tapp bapp1 bapp2 bapp3 bapp4 bapp5 bapp6 bapp7 lwenf lwenf_full prdayhours nprdayhours total_program_officer_hours 
	border_enforcement_hours patrol_border_hours patrol_interior_hours";
	
/*new ppp gdp data:*/
use "US_gdp_updated.dta", replace;
collapse (mean) gdp*, by(year);
keep gdp_us gdp_mx year;
save "US_gdp_updated_redux.dta", replace;	

/*Prepping the enforcement effort data:*/
/*Annualized cumulates by sector:*/
use "catch_dhs_brookings.dta", clear;

/*These variables have zeros where they should be missing in some years:*/
foreach x in lwenf lwenf_full prdayhours nprdayhours total_program_officer_hours border_enforcement_hours patrol_border_hours patrol_interior_hours{;
	replace `x' = . if `x'==0;
	};
collapse $effort , by(sector year);

/*these are the averages by month; annualize by multiplying by 12 (this is preferable to summing above because it preserves missings where there is no data):*/
foreach x in $effort {;
	replace `x' = `x' * 12;
	};

/*making a single effort index variable that tracks the available data in as many years as possible:*/
gen overlap = (year==2004);
foreach x in lwenf_full nprdayhours {;
	egen `x'_om1 = mean(`x') if overlap==1;
	egen `x'_om = mean(`x'_om1);
	drop `x'_om1; 
	};
gen index = lwenf_full;
replace index = nprdayhours * (lwenf_full_om/nprdayhours_om) if lwenf_full==.;

/*Normalize effort so that you have the predicted number of apprehensions in the base year (99-00) equal the observed mean in those years.  
With this adjustment, the product of the enforcement and the number of apprehensions can be taken directly as a prediction of apprehensions:*/
gen base = (year==1999 | year==2000);
foreach x in index {;
	egen `x'_bm1 = mean(`x') if base==1;
	egen `x'_bm = mean(`x'_bm1);
	gen `x'_norm = `x'/`x'_bm;
	drop `x'_bm1; 
	};

drop if index_norm==.;

save "catch_dhs_brookings_redux.dta", replace;

/*Prepping the apprehensions data (calculating average number of apprehensions in early data per age/gender/state_of_birth/sector:*/
use "apps_by_sob.dta", clear;

/*keeping only the earliest years to avoid later endogeneity:*/
keep if (fy=="99" | fy=="00");
destring state_of_birth_cd, force replace;
/*generating age cells to match the REStat data*/
drop if age<=16;
drop if age>50;
replace age = 17.5 if age>=17 & age<=18;
replace age = 20 if age>=19 & age<=21;
replace age = 22.5 if age>=22 & age<=23;
replace age = 25 if age>=24 & age<=26;
replace age = 27.5 if age>=27 & age<=28;
replace age = 30 if age>=29 & age<=31;
replace age = 32.5 if age>=32 & age<=33;
replace age = 35 if age>=34 & age<=36;
replace age = 37.5 if age>=37 & age<=38;
replace age = 40 if age>=39 & age<=41;
replace age = 42.5 if age>=42 & age<=43;
replace age = 45 if age>=44 & age<=46;
replace age = 47.5 if age>=47 & age<=48;
replace age = 49.5 if age>=49 & age<=50;
collapse apprehensions (first) state_of_birth  sector_nm , by(age gender state_of_birth_cd sector);
egen id = group(age  gender  state_of_birth_cd  sector);

/*expanding this cross-sectional dataset into panel long form by duplicating it for every year so it can be merged to the enforcement data by sector/year:*/
foreach x in 1977	1978	1979	1980	1981	1982	1983	1984	1985	1986	1987	1988	1989	1990	1991	1992	1993	
		1994	1995	1996	1997	1998	1999	2000	2001	2002	2003	2004	2005	2006	2007	2008  {;
	gen yr`x' = .;
	};	
reshape long yr , i(id) j(year);
drop yr;

/*merging to the annualized (redux) enforcement effort data at the sector/year level:*/
merge m:1 sector year using "catch_dhs_brookings_redux.dta";

/*Generating the panel variable to be used as an enforcement effort control:*/
gen effort_index = index_norm * apprehensions;
label var effort_index "Predicted number of apprehensions per sex/age/statemx/sector";

/*Renaming to align to ReStat data:*/
gen statemx=.;
replace statemx = 1 if state_of_birth=="AGS";
replace statemx = 2 if state_of_birth=="BCN";
replace statemx = 3 if state_of_birth=="BCS";
replace statemx = 4 if state_of_birth=="CAM";
replace statemx = 5 if state_of_birth=="COA";
replace statemx = 6 if state_of_birth=="COL";
replace statemx = 7 if state_of_birth=="CHP";
replace statemx = 8 if state_of_birth=="CHH";
replace statemx = 9 if state_of_birth=="DF";
replace statemx = 10 if state_of_birth=="DUR";
replace statemx = 11 if state_of_birth=="GTO";
replace statemx = 12 if state_of_birth=="GRR";
replace statemx = 13 if state_of_birth=="HID";
replace statemx = 14 if state_of_birth=="JAL";
replace statemx = 15 if state_of_birth=="MEX";
replace statemx = 16 if state_of_birth=="MCH";
replace statemx = 17 if state_of_birth=="MOR";
replace statemx = 18 if state_of_birth=="NAY";
replace statemx = 19 if state_of_birth=="NL";
replace statemx = 20 if state_of_birth=="OAX";
replace statemx = 21 if state_of_birth=="PBL";
replace statemx = 22 if state_of_birth=="QRO";
replace statemx = 23 if state_of_birth=="QRT";
replace statemx = 24 if state_of_birth=="SLP";
replace statemx = 25 if state_of_birth=="SIN";
replace statemx = 26 if state_of_birth=="SON";
replace statemx = 27 if state_of_birth=="TAB";
replace statemx = 28 if state_of_birth=="TAM";
replace statemx = 29 if state_of_birth=="TLX";
replace statemx = 30 if state_of_birth=="VER";
replace statemx = 31 if state_of_birth=="YUC";
replace statemx = 32 if state_of_birth=="ZAC";
gen sex = 1 if gender=="Male";
replace sex = 2 if gender=="Female";
drop gender;

/*This is incorrect but necessary to not drop 1970 from the regressions altogether:*/
*replace year = 1970 if year==1977;
drop if (year>=1977 & year<=1980);
replace year = 1990 if year>=1981 & year<=1990;
replace year = 2000 if year>=1991 & year<=2000;
replace year = 2010 if year>=2001 & year<=2010;
replace year = 2015 if year>=2011 & year<=2015;
collapse (sum) effort_index, by(year age sex statemx);

save "effort_index", replace;
/*this resulting file should be mergable to the main data:*/
*/ 

/*BEGIN DATA ASSEMBLY:*/
/*Getting the GDP data ready for use as labor demand controls:*/
#delimit;
clear;
use "populationgdp_chen.dta", clear;
encode state, gen(statemx);
for num 1940/2015:  gen dX=.;
reshape long p g d, i(statemx) j(realyear);
replace g = g*1000 if realyear>=2003;
drop d*;
gen year=realyear;
sort year;
merge m:1 year using "Millman_1960-2015.dta";
sort statemx year;
gen pcgdpmxs=g/p*1000;
rename p popmxs;
rename g gdpmxs; 
replace pcgdpmxs = pcgdpmxs/10 if statemx==4 & realyear==1985;

drop state year hours apprehensions popmxs gdpmxs _merge;
/*predicting what US GDP pc would have been in the absence of the great recession, simple ts w quadratic fit in time:*/
gen realyear_sq = realyear^2;
regr pcgdpus realyear realyear_sq if ( realyear<2006);
predict pcgdpus_hat;

*gen year=realyear;
*merge m:1 year using "$dropbox_dir\US_gdp_updated_redux.dta";

/*Interpolating Mexican State pc GDP:*/
/*calculating the log ratio of Mexican State to US GDP per capita:*/
gen gdprat=pcgdpmxs/pcgdpus;
gen gdprat_hat=pcgdpmxs/pcgdpus_hat;
gen lgdprat1=log(gdprat);
gen lgdprat_hat1=log(gdprat_hat);
gen lpcgdpus1 = log(pcgdpus);
gen lpcgdpmxs1 = log(pcgdpmxs);
ipolate lgdprat1 realyear , by(statemx) gen(lgdprat);
ipolate lgdprat_hat1 realyear , by(statemx) gen(lgdprat_hat);
ipolate lpcgdpus1 realyear , by(statemx) gen(lpcgdpus);
ipolate lpcgdpmxs1 realyear , by(statemx) gen(lpcgdpmxs);
gen birthyr=realyear-16;


*FIGURE 13 PANEL A: picture of actual and predicted GDP during the GR
#delimit;
preserve;
drop if realyear<1960;
sort realyear;
label var pcgdpus "US Per Capita GDP" ;
label var pcgdpus_hat "Quadratic Fit";
tw line pcgdpus realyear || line pcgdpus_hat realyear, lpattern(dash) title("Actual and Simulated GDP pc during Great Recession");

*FIGURE 13 PANEL B: picture of actual and predicted log GDP ratios during the GR
#delimit;
collapse lgdprat lgdprat_hat, by(realyear);
sort realyear;
drop if realyear<2000;
drop if realyear>2010;
label var lgdprat "Actual log MX/US GDP Ratio";
label var lgdprat_hat "Simulated Ratio";
tw line lgdprat realyear || line lgdprat_hat realyear , lpattern(dash) title("Actual and Counterfactual Log GDP pc Ratios") subtitle("During Great Recession")
	xtitle("Year");

******************************************************************************************************************
******************************************************************************************************************
* The following code produces the left panel of Figure 18 of this paper. We use CPS data from year 1962 -2015.

use "cps6215.dta", clear

drop if age<18
drop if age>64
drop if incwage==0
drop if incwage>9999990
drop if educ==0 | educ==1 | educ==999
drop if uhrsworkly>900 & year>=1976
drop if uhrsworkly<=0 & year>=1976
drop if wtsupp<=0
drop if wkswork2==0 
drop if wkswork2==9 
drop if wkswork1<=0 
drop if hig==999
gen years=0
replace years=9 if hig<=121
replace years=10 if hig==130 | hig==131
replace years=11 if hig==140 | hig==141
replace years=12 if hig==150 | hig==151
replace years=13 if hig==160 | hig==161
replace years=14 if hig==170 | hig==171
replace years=15 if hig==180 | hig==181
replace years=16 if hig==190 | hig==191
replace years=17 if hig==200 | hig==201
replace years=18 if hig==210
tab years if year<=1991
tab years if year>1991
replace years=9 if educ99<=6
replace years=10 if educ99==7
replace years=11 if educ99==8
replace years=12 if educ99==9 | educ99==10
replace years=13.5 if educ99==11
replace years=14 if educ99==12 | educ99==13 | educ99==14
replace years=16 if educ99==15
replace years=17.5 if educ99==16 | educ99==17
replace years=18 if educ99==18
tab years if year>1991
replace wkswork1 = 6.5  if wkswork2==1 & year<=1975
replace wkswork1 = 20   if wkswork2==2 & year<=1975
replace wkswork1 = 33   if wkswork2==3 & year<=1975
replace wkswork1 = 43.5 if wkswork2==4 & year<=1975
replace wkswork1 = 48.5 if wkswork2==5 & year<=1975
replace wkswork1 = 51   if wkswork2==6 & year<=1975

*CREATE POTENTIAL LABOR MARKET EXPERIENCE (FOLLOWING BEAUDRY GREEN & SAND DEFINITION)
gen exp=age-years-6
replace exp=age-16 if age-16<exp
replace exp=0 if exp<0
keep if exp>=0 & exp<=39
gen exper = 1*(exp>=0 & exp<=9) + 2*(exp>=10 & exp<=19) + 3*(exp>=20 & exp<=29) + 4*(exp>=30 & exp<=39)
drop if exper==0 
drop if educ==999
gen edu = 1*(years<=11) + 2*(years==12) + 3*(years>12 & years<16) + 4*(years==16) + 5*(years>16) 
drop if edu==0 
gen foreign=0
replace foreign=1 if bpl>14900 & citizen>1 
gen mex=0
replace mex=1 if bpl==20000
gen black=race==200
drop if wkswork1==.
merge m:1 year using "pce.dta"
drop _m

keep if wkswork2==4 | wkswork2==5 | wkswork2==6
replace uhrsworkly=40 if year<1976
keep if uhrsworkly>=35 & uhrsworkly<=99
gen hours = wkswork1*uhrsworkly
gen earn = (incwage/wkswork1)/(pce/115.784)
gen lnw = log(earn)
for any 10 13 14 : drop if classwkr==X
save "1.dta", replace
collapse (sum) hours [aw=wtsupp], by(year exper edu sex black)
egen thrs=sum(hours), by(year)
egen ghrs=sum(hours), by(year exper sex black)
gen mhrs=ghrs/thrs
collapse (mean) mhrs, by(exper sex black)
sort exper sex black
save "2.dta", replace

use "1.dta"
sort exper sex black
merge m:1 exper sex black using "2.dta"
drop _m
drop if edu==.
drop if edu==2 | edu==3
replace edu=2 if edu==4 | edu==5 
collapse (mean) lnw earn [aw=mhrs], by(year edu)
reshape wide lnw earn, i(year) j(edu)

*RESTATE EARNINGS RELATIVE TO COLLEGE GRADS

gen g1=lnw2-lnw1
save "w.dta", replace
use "1.dta", clear
replace wtsupp=wtsupp*wkswork1
keep if years==12 & sex==1 & black==0
keep if exp>=8 & exp<=12
collapse (mean) earn [aw=wtsupp], by(year)
ren earn earn_hs
sort year
save "3.dta", replace
*GET WEEKLY EARNINGS FOR FULL TIME WORKERS BY EDUCATION-EXPERIENCE-RACE-SEX CELLS

use "1.dta", clear
sort year 
merge year using "3.dta"
gen relearn = earn/earn_hs
collapse (mean) relearn, by(exp edu sex black)
sort exp edu sex black
save "4.dta", replace
  
*GET DATA FOR LABOR SUPPLY CONSTRUCTION, FORMING EDUCATION-EXPERIENCE-SEX-RACE CELLS
use "1.dta", clear
sort exp edu sex black
merge m:1 exp edu sex black using "4.dta"
drop _m
drop if edu==2 | edu==3
replace edu=2 if edu==4 | edu==5  
collapse (sum) wtsupp  [aw=relearn], by(year edu)
gen lnh = log(wtsupp)
reshape wide lnh wtsupp, i(year) j(edu)
gen ho1=lnh2-lnh1
save "h1.dta", replace

use "1.dta", clear
sort exp edu sex black
merge m:1 exp edu sex black using "4.dta"
drop _m
drop if edu==3
replace edu=1 if edu==2
replace edu=2 if edu==4 | edu==5 

collapse (sum) wtsupp (mean)foreign [aw=relearn], by(year edu)

gen x1=wtsupp*foreign if year==1994 
egen r1=mean(x1) if edu==1
gen x2=wtsupp*foreign if year==2007 
egen r2=mean(x2) if edu==1

gen na=wtsupp*(1-foreign)  
gen im=r2*((r2/r1)^((year-2007)/13)) if year>=2008
gen h=wtsupp if year<=2007
replace h=wtsupp if edu==2
replace h=na+im if h==.
drop x1 r1 x2 r2 na im foreign wtsupp
reshape wide h, i(year) j(edu)
gen ho2=log(h2/h1)

merge 1:1 year using "h1.dta"
drop _m
merge 1:1 year using "w.dta"
drop _m
merge 1:1 year using "unem.dta"
drop if _m<3
drop _m
gen t=_n
gen t2=t^2
gen a=1*(year>2007)
gen b=t*a

replace rmin=log(rmin)
reg g1 ho1 t  if year<=2007
outreg2 using table.dta, e(rmse) bdec(3) tdec(3) replace keep(ho1 t t2 b) 
reg g1 ho1 t
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  keep(ho1 t t2 b)
reg g1 ho1 t b
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  keep(ho1 t t2 b)
reg g1 ho1 t t2 
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  keep(ho1 t t2 b)
reg g1 ho1 t rate rmin
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  keep(ho1 t t2 b rate rmin)
reg g1  t rate rmin
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  label excel keep(ho1 t t2 b rate rmin)
reg g1 ho1 t if year<=2007
outreg2 using table.dta, e(rmse) bdec(3) tdec(3) replace label excel keep(ho1 t t2)
predict y1
gen y2= ho2*_b[ho1]+ t*_b[t] + _b[_cons]  
drop if year==2014
twoway (line g1 year,  lp(shortdash) lc(blue) ) (line y1 year,  lp(line) lc(red) )  (line y2 year,  lp(longdash) lc(black) ) , xline(2007, lwidth(0.5) lpattern(dash) lcolor(pink) ) legend(label(1 Observed relative wage) label(2 Katz-Murphy predicted wage gap) label(3 Counterfactual predicted wage gap) ring(10) position(6) col(1) size(small) ) xtitle("Year") ytitle("High-Skill, Low-Skill Wage Gap") title("") xlab(1965(10)2015, nogrid) ylab(0.3(0.2)1.3, angle(h) nogrid) graphregion(color(white) ) 
graph export "a1.pdf", as(pdf) replace

*****************************************************************************************************************
*****************************************************************************************************************
*The following code produces the right panel of Figure 18 of this paper.

use "1.dta"
sort exper sex black
merge m:1 exper sex black using "2.dta"
drop _m
drop if edu==.
drop if edu==3
replace edu=1 if edu==2
replace edu=2 if edu==4 | edu==5 
collapse (mean) lnw earn [aw=mhrs], by(year edu)
reshape wide lnw earn, i(year) j(edu)

*RESTATE EARNINGS RELATIVE TO COLLEGE GRADS

gen g1=lnw2-lnw1
save "w.dta", replace

*GET WEEKLY EARNINGS FOR FULL TIME WORKERS BY EDUCATION-EXPERIENCE-RACE-SEX CELLS

use "1.dta", clear
sort year 
merge year using "3.dta"
gen relearn = earn/earn_hs
collapse (mean) relearn, by(exp edu sex black)
sort exp edu sex black
save "4.dta", replace
  
*GET DATA FOR LABOR SUPPLY CONSTRUCTION, FORMING EDUCATION-EXPERIENCE-SEX-RACE CELLS
use "1.dta", clear
sort exp edu sex black
merge m:1 exp edu sex black using "4.dta"
drop _m
drop if edu==2 | edu==3
replace edu=2 if edu==4 | edu==5  
collapse (sum) wtsupp  [aw=relearn], by(year edu)   
gen lnh = log(wtsupp)
reshape wide lnh wtsupp, i(year) j(edu)
gen ho1=lnh2-lnh1
save "h1.dta", replace

use "1.dta", clear
sort exp edu sex black
merge m:1 exp edu sex black using "4.dta"
drop _m
drop if edu==3
replace edu=1 if edu==2
replace edu=2 if edu==4 | edu==5 

collapse (sum) wtsupp (mean)foreign [aw=relearn], by(year edu)

gen x1=wtsupp*foreign if year==1994 
egen r1=mean(x1) if edu==1
gen x2=wtsupp*foreign if year==2007 
egen r2=mean(x2) if edu==1

gen na=wtsupp*(1-foreign)  
gen im=r2*((r2/r1)^((year-2007)/13)) if year>=2008
gen h=wtsupp if year<=2007
replace h=wtsupp if edu==2
replace h=na+im if h==.
drop x1 r1 x2 r2 na im foreign wtsupp
reshape wide h, i(year) j(edu)
gen ho2=log(h2/h1)

merge 1:1 year using "h1.dta"
drop _m
merge 1:1 year using "w.dta"
drop _m
merge 1:1 year using "unem.dta"
drop if _m<3
drop _m
gen t=_n
gen t2=t^2
gen a=1*(year>2007)
gen b=t*a

replace rmin=log(rmin)
reg g1 ho1 t  if year<=2007
outreg2 using table.dta, e(rmse) bdec(3) tdec(3) replace keep(ho1 t t2 b) 

reg g1 ho1 t
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  keep(ho1 t t2 b)
reg g1 ho1 t b
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  keep(ho1 t t2 b)
reg g1 ho1 t t2 
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  keep(ho1 t t2 b)
reg g1 ho1 t rate rmin
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  keep(ho1 t t2 b rate rmin)
reg g1  t rate rmin
outreg2 using table.dta, e(rmse) bdec(3) tdec(3)  label excel keep(ho1 t t2 b rate rmin)
reg g1 ho1 t if year<=2007
outreg2 using table.dta, e(rmse) bdec(3) tdec(3) replace label excel keep(ho1 t t2)
predict y1
gen y2= ho2*_b[ho1]+ t*_b[t] + _b[_cons]  
drop if year==2014
twoway (line g1 year,  lp(shortdash) lc(blue) ) (line y1 year,  lp(line) lc(red) )  (line y2 year,  lp(longdash) lc(black) ) , xline(2007, lwidth(0.5) lpattern(dash) lcolor(pink) ) legend(label(1 Observed relative wage) label(2 Katz-Murphy predicted wage gap) label(3 Counterfactual predicted wage gap) ring(10) position(6) col(1) size(small) ) xtitle("Year") ytitle("High-Skill, Low-Skill Wage Gap") title("") xlab(1965(10)2015, nogrid) ylab(0.3(0.2)1.3, angle(h) nogrid) graphregion(color(white) ) 
graph export "a2.pdf", as(pdf) replace

********************************************************************************************************************************
********************************************************************************************************************************
* To produce Figure 19, we replicate the above code, but condition on a sample of individuals who work in low-skill industries
* We conditional individuals by the following codes

ren ind1950 ind
keep if ind==105 | ind==246 | (ind>=406 & ind<=489) | ind==679 | (ind>=826 & ind<=849)

 



	